setwd("/Users/spagnuolo/Desktop/Reuter et el. 2025_final/Raw_data/Data_sets_Phage_titers")
## General packages required for analysis of all experiments
### Data organization and structuring
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.4 ✔ purrr 1.0.2
## ✔ tibble 3.2.1 ✔ stringr 1.5.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ car::recode() masks dplyr::recode()
## ✖ purrr::some() masks car::some()
### Data visualization
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(ggthemes)
library(plotrix)
library(scales) # for trans_beaks when axis is log-transformed
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:plotrix':
##
## rescale
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggpubr)
# Experiment 3: Adsorption constant analysis
library(multcomp) # Pairwise comparison
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'TH.data'
##
## The following object is masked from 'package:MASS':
##
## geyser
library(lme4) # Linear mixed model
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
# Experiment 4: Images analysis plaques
library (viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
##
## The following object is masked from 'package:scales':
##
## viridis_pal
# Experiment 6: Decay analysis
library(stringr)
Code was used to monitor MOI input for each transfer.
READ ME Specification of columns in data set
Transfer (T): Individual three-hour transfers
Line (L): 5 selection lines started at the beginning of the experiment from five individual large plaques of ancestral, large-plaque-forming phage PhiX174
CFU: colony-forming units of E. coli C after 2 hours of growth to exponential phase. CFU count of T3 (644 CFU) was acquired by counting a quarter of the plate (161 CFU), which was multiplied by a factor 4
Dilution_CFU: plating dilution used to plate the colonies counted; 100 µl of this dilution was plated unless stated otherwise
PFU: Plaque-forming units counted on plaque assay plate. Plaque assays were done for the phage lysate extracted after each transfer by chloroform extraction
Dilution_PFU: plating dilution used to plate phage plaques. 100 µl of this dilution was plated unless stated otherwise
df<-read.csv("experiment_1_3_h_transfers.csv")
# df should have 28 observations of 6 variables
str(df) # structure of the columns
## 'data.frame': 28 obs. of 6 variables:
## $ Transfer : chr "T1" "T1" "T1" "T1" ...
## $ Line : chr "L1" "L2" "L3" "L4" ...
## $ CFU : int 76 76 76 76 76 98 98 98 98 98 ...
## $ Dilution_CFU: num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ PFU : int 120 21 238 88 44 21 26 89 19 132 ...
## $ Dilution_PFU: num 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 ...
summary(df) # summary of the columns
## Transfer Line CFU Dilution_CFU
## Length:28 Length:28 Min. : 76.0 Min. : 10000
## Class :character Class :character 1st Qu.: 98.0 1st Qu.:100000
## Mode :character Mode :character Median :150.0 Median :100000
## Mean :248.3 Mean : 83929
## 3rd Qu.:373.0 3rd Qu.:100000
## Max. :644.0 Max. :100000
## PFU Dilution_PFU
## Min. : 19.00 Min. : 10000000
## 1st Qu.: 30.50 1st Qu.: 10000000
## Median : 63.50 Median : 10000000
## Mean : 66.82 Mean : 19642857
## 3rd Qu.: 87.25 3rd Qu.: 10000000
## Max. :238.00 Max. :100000000
# Turn Transfer and Line into factors
df$Transfer=as.factor(df$Transfer)
df$Line=as.factor(df$Line)
# Calculate CFU per ml by multiplying CFU counts and Dilution factor and multiply by an additional factor 10 to get CFU per ml because 100 µl of the plating dilution "Dilution_CFU" was used for plating
df$CFU_ml=df$CFU*df$Dilution_CFU*10
# Calculate PFU per ml by multiplying PFU counts and Dilution factor and multiply by an additional factor 10 to get PFU per ml because 100 µl of the plating dilution "Dilution_PFU" was used for the plaque assay
df$PFU_ml=df$PFU*df$Dilution_PFU*10
# Bacteria concentration
a=ggplot(df,aes(Transfer,CFU_ml))+
geom_point()+ # dot plot
xlab("Transfer")+ # label of x-axis
ylab("E.coli C concentration (CFU/ml)")+ # label of y-axis
ggtitle("Bacteria concentration")+ # plot title
theme_bw() #plot layout
a
# Virus concentration for each selection line separately until first small plaques were detected (comments on plot b also apply to plots c-f)
## Selection line 1
b=ggplot(df[df$Line=="L1",],aes(Transfer,PFU_ml))+
geom_point(color="blue")+ # dot plot
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+ # log transformed y-axis
xlab("Transfer")+ # label of x-axis
ylab("PhiX174 concentration (PFU/ml)")+ #label of y-axis
ggtitle("Selection line L1")+ # plot title
theme_bw() # plot layout
## Selection line 2
c=ggplot(df[df$Line=="L2",],aes(Transfer,PFU_ml))+
geom_point(color="purple")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L2")+
theme_bw()
## Selection line 3
d=ggplot(df[df$Line=="L3",],aes(Transfer,PFU_ml))+
geom_point(color="green")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L3")+
theme_bw()
## Selection line 4
e=ggplot(df[df$Line=="L4",],aes(Transfer,PFU_ml))+
geom_point(color="red")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+10))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L4")+
theme_bw()
## Selection line 5
f=ggplot(df[df$Line=="L5",],aes(Transfer,PFU_ml))+
geom_point(color="orange")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L5")+
theme_bw()
# Combine phage concentration plots for all selection lines in one plot
grid.arrange(b,c,d,e,f)
Code used to generate Fig. S2A in the manuscript
READ ME Specification of columns in data set
Line (L): Five selection lines started at the beginning of the experiment from five individual large plaques of ancestral PhiX174
Genotype: Genotype of the small plaque mutant isolated from the 3-h regime, with the single-point mutation indicated as F[for F protein] and in the brackets the mutation in the protein with old amino acid, amino acid position, and new amino acid
Transfer_number (T): Number of 3-h transfers required to isolate one isogenic, heritable small plaque
Total_PFU_ml: Total Phage concentration in PFU per ml for the transfer at which the first small plaques were observed. Column corresponds to the PFU_ml column of data set “experiment_1_3_h_transfers.csv” (see above) was used for the first transfer at which the first small plaque mutant could be isolated from a plaque assay plate - For Line L1 the first small plaques were isolated after transfer T6 - For Line L2 the first small plaques were isolated after transfer T7 - For Line L3 the first small plaques were isolated after transfer T5 - For Line L4 the first small plaques were isolated after transfer T4 - For Line L5 the first small plaques were isolated after transfer T6
Small_PFU_ml: Estimated concentration of small plaques of the “Total_PFU_ml”
# Create data frame with number of transfers required to get first small plaques
df=read.csv("experiment_1_3_h_transfers_small_plaque_frequency.csv")
# df should have 5 observations of 5 variables
str(df) #structure of the dataset
## 'data.frame': 5 obs. of 5 variables:
## $ Line : chr "L1" "L2" "L3" "L4" ...
## $ Genotype : chr "F(G322D)" "F(G322D)" "F(M213I)" "F(S427*)" ...
## $ Transfer_number: int 6 7 5 4 6
## $ Total_PFU_ml : num 1.9e+10 8.8e+09 7.2e+09 5.8e+09 8.9e+09
## $ Small_PFU_ml : num 1e+09 1e+08 1e+08 1e+08 1e+08
# Convert Line and Genotype into factors
df$Genotype=as.factor(df$Genotype)
df$Line=as.factor(df$Line)
# Create a new column for the frequency of small plaques in %
df$Frequency=NA
# Calculate the frequency in %
df$Frequency=(df$Small_PFU_ml)/(df$Total_PFU_ml*0.01)
# show frequencies
print(df$Frequency)
## [1] 5.263158 1.136364 1.388889 1.724138 1.123596
# Code to get the x-ticks in two lines including selection line AND genotype
addline_format <- function(x,...){
gsub('\\s','\n',x)
}
# Visualize results
x=ggplot(df,aes(x=reorder(Line, -Transfer_number),y=Transfer_number, group=Genotype, fill=Genotype))+
geom_col()+ # bar chart
coord_flip()+ # flip x and y axis
scale_fill_manual(values=c("#c6d325","#CC97A7", "#ef7c00"))+ # fill colors based on genotype
scale_x_discrete(limits=c("L2","L1","L5","L3","L4"),
labels=addline_format(c("F(G322D) L2","F(G322D) L1","F(G322D) L5","F(M213I) L3","F(S427*) L4")))+ # change y-axis labeling (x command because axes are flipped)
annotate("text",x=5, y=1,
label = "1.72% small plaques", size = 5)+ # add frequency Line 4: 1.72%
annotate("text",x=4, y=1,
label = "1.39% small plaques", size = 5)+ # add frequency Line 3: 1.39%
annotate("text",x=3, y=1,
label = "1.12% small plaques", size = 5)+ # add frequency Line 5: 1.12%
annotate("text",x=2, y=1,
label = "5.26% small plaques", size = 5)+ # add frequency Line 1: 5.26%
annotate("text",x=1, y=1,
label = "1.14% small plaques", size = 5)+ # add frequency Line 2: 1.14%
ylab("Number of 3-h transfer")+ # label x axis (after flipping)
xlab("Genotype")+ # label y-axis (after flipping)
theme_bw()+ # plot layout
theme(legend.position="none")+ # not legend
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
# Same image as high resolution jpeg file
# ggsave("mut_freq_3h.jpeg", plot = x, dpi = 600, width = 10, height = 6)
Code was used to monitor MOI input for each transfer.
READ ME Specification of columns in data set
Passage (P): every passage day consisted of four consecutive 30-min transfers (T1 to T4) followed by one 3-hour-long amplification step (T5) after which phages were extracted by chloroform-extraction. P2 had to be repeated due to a failure of producing detectable phage particles. Passage P2b was worked and was used as the second passage day. In total, 10 passage days (40 30-minute transfers + 10 amplification steps) were done
Transfer (T): Individual transfers + amplification step done within one passage day
Passage_ID: Individual ID for each single passage+transfer ranging from 1-50
Line (L): 5 selection lines started at the beginning of the experiment from five individual large plaques of ancestral, large-plaque-forming phage PhiX174
CFU: colony-forming units after 2 hours of E. Coli C growth to get exponential overday culture
Dilution_CFU: plating dilution used to plate the colonies counted; 100 µl of this dilution was plated unless stated otherwise
CFU_ml: colony-forming units per milliliter calculated by CFU* Dilution_CFU*10. Factor 10 for multiplication was used because the plating volume was 100 µl
PFU: Plaque-forming units counted on the plaque assay plate (only done for transfer 5 of each passage)
Dilution_PFU: plating dilution used to plate phage plaques. 100 µl of this dilution was plated unless stated otherwise
PFU_ml: Plaque-forming units per millilitre calculated by PFU* Dilution_PFU*10. Factor 10 for multiplication was used because the plating volume was 100 µl
Spot_assay_dilution: 10-fold dilution of phages from transfers T1 to T4, at which a spot on a lawn of ancestral, phage-sensitive E. Coli C could be detected. (Only done for transfers 1 to 4 for each passage)
Output_phage_ml: Phage concentration in phages per millilitre calculated after each transfer. For transfers T1 to T4, by calculating Spot_assay_dilution*500. Factor 500 because 2 µl of phage lysate was plated. For transfer T5, concentration calculation as described under column “PFU_ml”.
-Inoculation_volume_ml: Volume of phage lysate put into each transfer. For transfers T2 to T5 on each passage day, 100 µl of phage+bacteria mix from the previous transfer was used. For the first transfer of each passage day, the phage input volume was calculated to get an MOI input of ~0.1.
Real_virus_concentration: Real phage concentration put into each transfer based on Spot assay and PFU counts
Real_MOI: Real MOI input calculation for each transfer based on real virus concentration from column “Real_virus_concentration” and bacteria concentration in relation to colony-forming units per ml determined by plating bacteria colonies (recorded in column “CFU_ml”).
df=read.csv("experiment_2_30_min_transfers.csv")
# df contains 275 observations of 15 variables
str(df) # structure of columns
## 'data.frame': 275 obs. of 15 variables:
## $ Passage : chr "P1" "P1" "P1" "P1" ...
## $ Transfer : chr "T1" "T1" "T1" "T1" ...
## $ Passage_ID : int 1 1 1 1 1 2 2 2 2 2 ...
## $ Line : chr "L1" "L2" "L3" "L4" ...
## $ CFU : int 212 212 212 212 212 1260 1260 1260 1260 1260 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+04 1e+04 1e+04 1e+04 1e+04 ...
## $ CFU_ml : num 2.12e+08 2.12e+08 2.12e+08 2.12e+08 2.12e+08 1.26e+08 1.26e+08 1.26e+08 1.26e+08 1.26e+08 ...
## $ PFU : int NA NA NA NA NA NA NA NA NA NA ...
## $ Dilution_PFU : num NA NA NA NA NA NA NA NA NA NA ...
## $ PFU_ml : num NA NA NA NA NA NA NA NA NA NA ...
## $ Spot_assay_dilution : num 1e+05 1e+05 1e+05 1e+04 1e+04 1e+05 1e+07 1e+05 1e+05 1e+07 ...
## $ Output_phage_ml : num 5e+07 5e+07 5e+07 5e+06 5e+06 5e+07 5e+09 5e+07 5e+07 5e+09 ...
## $ Inoculation_volume_ml : num 0.0135 0.0135 0.0135 0.0135 0.0135 0.1 0.1 0.1 0.1 0.1 ...
## $ Real_virus_concentration: num 9990000 9990000 9990000 9990000 9990000 1000000 1000000 1000000 100000 100000 ...
## $ Real_MOI : num 0.0471 0.0471 0.0471 0.0471 0.0471 ...
summary(df) # summary of the columns
## Passage Transfer Passage_ID Line
## Length:275 Length:275 Min. : 1.00 Length:275
## Class :character Class :character 1st Qu.:10.00 Class :character
## Mode :character Mode :character Median :23.00 Mode :character
## Mean :23.91
## 3rd Qu.:37.00
## Max. :50.00
##
## CFU Dilution_CFU CFU_ml PFU
## Min. : 88.0 Min. : 10000 Min. : 68000000 Min. : 16.0
## 1st Qu.: 215.0 1st Qu.: 10000 1st Qu.:112000000 1st Qu.: 44.0
## Median :1116.0 Median : 10000 Median :140000000 Median : 98.5
## Mean : 985.2 Mean : 36182 Mean :142541818 Mean :103.1
## 3rd Qu.:1464.0 3rd Qu.:100000 3rd Qu.:170000000 3rd Qu.:145.8
## Max. :1944.0 Max. :100000 Max. :249000000 Max. :255.0
## NA's :225
## Dilution_PFU PFU_ml Spot_assay_dilution Output_phage_ml
## Min. :1.0e+07 Min. :7.300e+09 Min. : 10000 Min. :5.000e+06
## 1st Qu.:1.0e+07 1st Qu.:1.578e+10 1st Qu.: 100000 1st Qu.:5.000e+07
## Median :1.0e+08 Median :3.200e+10 Median : 100000 Median :5.000e+08
## Mean :6.4e+07 Mean :5.099e+10 Mean : 717746 Mean :9.984e+09
## 3rd Qu.:1.0e+08 3rd Qu.:5.675e+10 3rd Qu.: 1000000 3rd Qu.:5.000e+08
## Max. :1.0e+08 Max. :2.550e+11 Max. :10000000 Max. :2.550e+11
## NA's :225 NA's :225 NA's :62 NA's :12
## Inoculation_volume_ml Real_virus_concentration Real_MOI
## Min. :0.0001955 Min. : 100000 Min. :0.000558
## 1st Qu.:0.1000000 1st Qu.: 1000000 1st Qu.:0.007081
## Median :0.1000000 Median : 9990000 Median :0.044467
## Mean :0.0810217 Mean : 8947948 Mean :0.064543
## 3rd Qu.:0.1000000 3rd Qu.: 10000000 3rd Qu.:0.095495
## Max. :0.1000000 Max. :100000000 Max. :0.896057
## NA's :7 NA's :7
# Passage P2 failed to produce detectable infectious particles after Transfer 3. Therefore all 5 transfers of P2 were excluded from analysis. P2 was repeated as "P2b" for all five transfers
df<-df[df$Passage!="P2",]
# Set Block, Passage, Transfer, Line to factors
df$Passage<-as.factor(df$Passage)
df$Transfer<-as.factor(df$Transfer)
df$Line<-as.factor(df$Line)
# Set color pallet
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#999999")
# Set level order of passages for plotting
df$Passage <- factor(df$Passage , levels=c("P1", "P2b", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "P10"))
# Plot bacteria concentration for each transfer
a<-ggplot(df,aes(Passage,CFU_ml, color=Transfer))+
geom_point()+ # dot plot
scale_color_manual(values=cbPalette)+ # color settings
xlab("Passage day")+ # label x-axis
ylab("CFU/ml")+ # label y-axis
theme_base()+ # plot layout
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "bottom") # font size and legend position
a
# Visualize all MOI's in one plot (e.g. violin plot with different lines)
ggplot(df,aes(Line, Real_MOI, fill=Line))+
geom_violin()+ # violin plot
scale_y_continuous(trans="log10")+ # log transformation of y-axis
theme_bw() # plot layout
Note –> Phages after each 5th transfer T5 is accurate (after 3h incubation), was isolated using chloroform, the other 4x30 min titers were determined after filtration using a spot test, which was too inaccurate. To quantify correct free phage titer after 30-min transfers, see Experiment 6: Free phage titer quantification
# Phage concentration for individual selection lines
## Subset dataframe to only transfer 5 for each passage
df1<-df[df$Transfer=="T5",]
# Plot phage titers for each selection line in an individual plot (comments on plot h apply also to plots i-l)
## Line 1
h<-ggplot(df1[df1$Line=="L1",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+ # dot plot with jitter to avoid point overlapping
scale_color_manual(values = cbPalette)+ # color setting
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+ # order of passages
xlab("Passage day")+ # label x-axis
ylab("Virus concentration (viruses/ml)")+ # label y-axis
ggtitle(("Selection line 1"))+ # plot title
theme_base()+ # plot layout
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none") # font size and legend position
h
## Line 2
i<-ggplot(df1[df1$Line=="L2",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 2"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
i
# Line 3
j<-ggplot(df1[df1$Line=="L3",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 3"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
j
# Line 4
k<-ggplot(df1[df1$Line=="L4",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 4"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
k
# Line 5
l<-ggplot(df1[df1$Line=="L5",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 5"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
l
# Combined plots of all lines
grid.arrange(h,i,j,k,l)
Code used to generate Fig. S3A in the manuscript
READ ME Specification of columns in data set
Genotype: Phage genotypes for which the adsorption assay was done: - wild type (WT) - small-plaque mutant G322D evolved in Experiment 1: 3-hour transfers - small-plaque mutant S427* evolved in Experiment 1: 3-hour transfers - large-plaque mutant T101A evolved in Experiment 2: 30-min transfers
Run: Repeated experimental runs, different runs are biological replicates, ran on independent bacterial cultures started from independent colonies. Different number of runs for each phage genotype - Wild type: Runs 1,2,3,4,5,6,7 - T101A: Runs 5,6,7, - G322D: Runs 1,2,3,4,5,6,7 - S427*: Runs 1.,2,3,4,5,7
Sampling_time_min: Sampling time point in minutes during the adsorption period from time point 2 minutes (initiation of adsorption) to 6 minutes in 2-minute time intervals.
PFU_1: Plaque-forming unit counts (PFU) for the first plating of triplicate plating for phage lysate for each sampling time point
PFU_2: Plaque-forming unit counts (PFU) for the second plating of triplicate plating for phage lysate for each sampling time point
PFU_3: Plaque-forming unit counts (PFU) for third plating of triplicate plating for phage lysate for each sampling time point
Median_PFU: Median PFU counts from triplicate plating PFU_1, PFU_2, PFU_3
Predilution: After sampling at the adsorption time point 2 min to 6 min, 500 µl of sample was diluted in 4.5 ml of unsupplemented LB-medium cooled on ice to prevent infection cycle completion and secondary adsorptions, resulting in a 10-fold dilution of the sample.
Dilution_plating: Plating dilution used to plate 100 µl of pre-diluted phage lysate from each sampling time point.
Dilution_in_ml: Dilution step to calculate phage concentration in PFU/ml. Factor is 10x because 100 µl of phage lysate was used for plating.
PFU_ml: Phage concentration in plaque-forming units per ml (PFU/ml), calculated as Median_PFU * Predilution * Dilution_plating * Dilution_in_ml
CFU: Colony-forming units of wild-type E. coli C at the beginning of the adsorption period
Dilution_CFU: Plating dilution used to plate CFUs; 100 µl of this dilution was plated for colony counts unless stated otherwise.
Bacteria_CFU_ml: Bacteria concentration in colony-forming units per ml CFU/ml) at the start of the adsorption period. Calculated as counting colony-forming units (CFU) * Dilution_CFU * 10 (because 100 µl of Dilution_CFU were used for plating)
df<-read.csv("experiment_3_adsorption_assay.csv")
# -> 69 observations of 14 variables
str(df)# structure of columns
## 'data.frame': 69 obs. of 14 variables:
## $ Genotype : chr "G322D" "G322D" "G322D" "G322D" ...
## $ Run : int 1 1 1 2 2 2 3 3 3 4 ...
## $ Sampling_time_min: int 2 4 6 2 4 6 2 4 6 2 ...
## $ PFU_1 : int 51 18 22 68 152 36 36 33 42 191 ...
## $ PFU_2 : int 38 18 20 81 146 29 32 68 52 151 ...
## $ PFU_3 : int 49 24 18 72 96 37 15 40 71 129 ...
## $ Median_PFU : int 49 18 20 72 146 36 32 40 52 151 ...
## $ Predilution : int 10 10 10 10 10 10 10 10 10 10 ...
## $ Dilution_plating : num 1000 100 10 1000 100 10 1000 100 10 1000 ...
## $ Dilution_in_ml : int 10 10 10 10 10 10 10 10 10 10 ...
## $ PFU_ml : num 4900000 180000 20000 7200000 1460000 36000 3200000 400000 52000 15100000 ...
## $ CFU : int 202 202 202 121 121 121 251 251 251 221 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ CFU_ml : num 2.02e+08 2.02e+08 2.02e+08 1.21e+08 1.21e+08 1.21e+08 2.51e+08 2.51e+08 2.51e+08 2.21e+08 ...
summary(df) # summary of columns
## Genotype Run Sampling_time_min PFU_1
## Length:69 Min. :1.000 Min. :2 Min. : 17.00
## Class :character 1st Qu.:2.000 1st Qu.:2 1st Qu.: 33.00
## Mode :character Median :4.000 Median :4 Median : 57.00
## Mean :4.174 Mean :4 Mean : 71.97
## 3rd Qu.:6.000 3rd Qu.:6 3rd Qu.: 94.00
## Max. :7.000 Max. :6 Max. :261.00
## PFU_2 PFU_3 Median_PFU Predilution
## Min. : 13.00 Min. : 14.00 Min. : 14.00 Min. :10
## 1st Qu.: 37.00 1st Qu.: 40.00 1st Qu.: 39.00 1st Qu.:10
## Median : 61.00 Median : 61.00 Median : 58.00 Median :10
## Mean : 72.94 Mean : 74.13 Mean : 72.75 Mean :10
## 3rd Qu.: 95.00 3rd Qu.: 96.00 3rd Qu.: 91.00 3rd Qu.:10
## Max. :312.00 Max. :367.00 Max. :261.00 Max. :10
## Dilution_plating Dilution_in_ml PFU_ml CFU
## Min. : 1.0 Min. :10 Min. : 2800 Min. :121.0
## 1st Qu.: 10.0 1st Qu.:10 1st Qu.: 47000 1st Qu.:196.0
## Median : 100.0 Median :10 Median : 390000 Median :221.0
## Mean : 190.4 Mean :10 Mean : 1229058 Mean :235.2
## 3rd Qu.: 100.0 3rd Qu.:10 3rd Qu.: 1130000 3rd Qu.:267.0
## Max. :1000.0 Max. :10 Max. :15100000 Max. :391.0
## Dilution_CFU CFU_ml
## Min. :1e+05 Min. :121000000
## 1st Qu.:1e+05 1st Qu.:196000000
## Median :1e+05 Median :221000000
## Mean :1e+05 Mean :235217391
## 3rd Qu.:1e+05 3rd Qu.:267000000
## Max. :1e+05 Max. :391000000
# Convert Genotype and Run into factors
df$Genotype=as.factor(df$Genotype)
df$Run=as.factor(df$Run)
# Log transform free phage concentrations
df$log_PFU_ml=log(df$PFU_ml)
# Comments of plot a also apply to plots b-g
# Run 1
a=ggplot(df[df$Run=="1",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+ # dot plot
scale_y_continuous(trans="log10")+ # log transformation of y-axis
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+ # set color palette
theme_bw() # plot layout
a
# Run 2
b=ggplot(df[df$Run=="2",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
theme_bw()
b
# Run 3
c=ggplot(df[df$Run=="3",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
theme_bw()
c
# Run 4
d=ggplot(df[df$Run=="4",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
theme_bw()
d
# Run 5
e=ggplot(df[df$Run=="5",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+
theme_bw()
e
# Run 6
f=ggplot(df[df$Run=="6",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ae017e","#29485d"))+
theme_bw()
f
# Run 7
g=ggplot(df[df$Run=="7",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
# geom_smooth(method="lm", linetype="dashed", se=F)+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+
theme_bw()
g
# Creat a data set to record slopes and R^2 values for each genotype and run, using subset df to time point 2 as a template
dfslope=df[df$Sampling_time_min==2,]
#-> dfslope should contain 23 observations of 6 variables
# Subset slope data set to only columns Run, Genotype and CFU_ml (Bacteria concentration needed for adsorption constant calculation)
dfslope=dfslope[,c(2,1,14)]
# Create unique identifier combining Genotype and Run
dfslope$ID=paste(dfslope$Run,dfslope$Genotype,sep="_")
# Create columns to record slope and R^2 values
dfslope$slope=NA
dfslope$R2=NA # multiple R^2
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==1,])
summary(a) #slope=-0.99138, R^2= 0.9989
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 1, ])
##
## Residuals:
## 49 50 51
## 0.03783 -0.07566 0.03783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.32905 0.14155 122.42 0.0052 **
## Sampling_time_min -0.99138 0.03276 -30.26 0.0210 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09267 on 1 degrees of freedom
## Multiple R-squared: 0.9989, Adjusted R-squared: 0.9978
## F-statistic: 915.6 on 1 and 1 DF, p-value: 0.02103
dfslope[dfslope$ID=="1_WT",]$slope=- 0.99138
dfslope[dfslope$ID=="1_WT",]$R2= 0.9989
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==2,])
summary(a) #slope=-0.83966 , R^2=0.9997
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 2, ])
##
## Residuals:
## 52 53 54
## 0.01778 -0.03557 0.01778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.40857 0.06654 261.62 0.00243 **
## Sampling_time_min -0.83966 0.01540 -54.52 0.01168 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04356 on 1 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9993
## F-statistic: 2972 on 1 and 1 DF, p-value: 0.01168
dfslope[dfslope$ID=="2_WT",]$slope=- 0.83966
dfslope[dfslope$ID=="2_WT",]$R2= 0.9997
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==3,])
summary(a) #slope=-0.78892, R^2=0.9984
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 3, ])
##
## Residuals:
## 55 56 57
## -0.03669 0.07337 -0.03669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.26296 0.13727 118.47 0.00537 **
## Sampling_time_min -0.78892 0.03177 -24.83 0.02562 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08986 on 1 degrees of freedom
## Multiple R-squared: 0.9984, Adjusted R-squared: 0.9968
## F-statistic: 616.6 on 1 and 1 DF, p-value: 0.02562
dfslope[dfslope$ID=="3_WT",]$slope=- 0.78892
dfslope[dfslope$ID=="3_WT",]$R2= 0.9984
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==4,])
summary(a) #slope=-0.9439, R^2=0.944
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 4, ])
##
## Residuals:
## 58 59 60
## 0.2654 -0.5308 0.2654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.1557 0.9931 16.268 0.0391 *
## Sampling_time_min -0.9439 0.2299 -4.106 0.1521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6501 on 1 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.888
## F-statistic: 16.86 on 1 and 1 DF, p-value: 0.1521
dfslope[dfslope$ID=="4_WT",]$slope=- 0.9439
dfslope[dfslope$ID=="4_WT",]$R2= 0.944
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==5,])
summary(a) #slope=-0.7034, R^2 0.8892
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 5, ])
##
## Residuals:
## 61 62 63
## 0.2867 -0.5734 0.2867
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.3377 1.0728 13.365 0.0475 *
## Sampling_time_min -0.7034 0.2483 -2.833 0.2161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7023 on 1 degrees of freedom
## Multiple R-squared: 0.8892, Adjusted R-squared: 0.7784
## F-statistic: 8.024 on 1 and 1 DF, p-value: 0.2161
dfslope[dfslope$ID=="5_WT",]$slope=- 0.7034
dfslope[dfslope$ID=="5_WT",]$R2= 0.8892
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==6,])
summary(a) #slope=-0.9422, R^2=0.9631
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 6, ])
##
## Residuals:
## 64 65 66
## 0.2128 -0.4257 0.2128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.3928 0.7964 19.328 0.0329 *
## Sampling_time_min -0.9422 0.1843 -5.112 0.1230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5214 on 1 degrees of freedom
## Multiple R-squared: 0.9631, Adjusted R-squared: 0.9263
## F-statistic: 26.13 on 1 and 1 DF, p-value: 0.123
dfslope[dfslope$ID=="6_WT",]$slope=- 0.9422
dfslope[dfslope$ID=="6_WT",]$R2= 0.9631
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==7,])
summary(a) #slope=-0.9246, slope=0.9389
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 7, ])
##
## Residuals:
## 67 68 69
## 0.2723 -0.5446 0.2723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.4412 1.0188 15.156 0.0419 *
## Sampling_time_min -0.9246 0.2358 -3.921 0.1590
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.667 on 1 degrees of freedom
## Multiple R-squared: 0.9389, Adjusted R-squared: 0.8779
## F-statistic: 15.37 on 1 and 1 DF, p-value: 0.159
dfslope[dfslope$ID=="7_WT",]$slope=- 0.9246
dfslope[dfslope$ID=="7_WT",]$R2= 0.9389
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==1,])
summary(a) #slope=-1.3753, R^2=0.9867
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 1, ])
##
## Residuals:
## 1 2 3
## 0.1845 -0.3689 0.1845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.9709 0.6902 26.037 0.0244 *
## Sampling_time_min -1.3753 0.1598 -8.609 0.0736 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4519 on 1 degrees of freedom
## Multiple R-squared: 0.9867, Adjusted R-squared: 0.9734
## F-statistic: 74.11 on 1 and 1 DF, p-value: 0.07362
dfslope[dfslope$ID=="1_G322D",]$slope=- 1.3753
dfslope[dfslope$ID=="1_G322D",]$R2= 0.9867
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==2,])
summary(a) #slope=-1.3246 , R^2= 0.9499
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 2, ])
##
## Residuals:
## 4 5 6
## -0.3512 0.7023 -0.3512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.7899 1.3140 14.300 0.0444 *
## Sampling_time_min -1.3246 0.3041 -4.355 0.1437
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8602 on 1 degrees of freedom
## Multiple R-squared: 0.9499, Adjusted R-squared: 0.8998
## F-statistic: 18.97 on 1 and 1 DF, p-value: 0.1437
dfslope[dfslope$ID=="2_G322D",]$slope=- 1.3246
dfslope[dfslope$ID=="2_G322D",]$R2= 0.9499
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==3,])
summary(a) #slope-1.029916, R^21
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 3, ])
##
## Residuals:
## 7 8 9
## 0.006537 -0.013074 0.006537
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.031956 0.024458 696.4 0.000914 ***
## Sampling_time_min -1.029916 0.005661 -181.9 0.003499 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01601 on 1 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 0.9999
## F-statistic: 3.31e+04 on 1 and 1 DF, p-value: 0.003499
dfslope[dfslope$ID=="3_G322D",]$slope=- 1.029916
dfslope[dfslope$ID=="3_G322D",]$R2= 1
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==4,])
summary(a) #slope=-1.3544, R^2=0.888
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 4, ])
##
## Residuals:
## 10 11 12
## 0.5555 -1.1110 0.5555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.6836 2.0785 8.989 0.0705 .
## Sampling_time_min -1.3544 0.4811 -2.815 0.2173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.361 on 1 degrees of freedom
## Multiple R-squared: 0.888, Adjusted R-squared: 0.7759
## F-statistic: 7.926 on 1 and 1 DF, p-value: 0.2173
dfslope[dfslope$ID=="4_G322D",]$slope=- 1.3544
dfslope[dfslope$ID=="4_G322D",]$R2= 0.888
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==5,])
summary(a) #slope=-1.3334, R^2= 0.9876
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 5, ])
##
## Residuals:
## 13 14 15
## 0.1728 -0.3455 0.1728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.7647 0.6464 24.389 0.0261 *
## Sampling_time_min -1.3334 0.1496 -8.912 0.0711 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4232 on 1 degrees of freedom
## Multiple R-squared: 0.9876, Adjusted R-squared: 0.9751
## F-statistic: 79.43 on 1 and 1 DF, p-value: 0.07114
dfslope[dfslope$ID=="5_G322D",]$slope=- 1.3334
dfslope[dfslope$ID=="5_G322D",]$R2= 0.9876
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==6,])
summary(a) #slope=-1.3502, R^2=0.9594
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 6, ])
##
## Residuals:
## 16 17 18
## 0.3208 -0.6415 0.3208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.3173 1.2002 13.60 0.0467 *
## Sampling_time_min -1.3502 0.2778 -4.86 0.1292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7857 on 1 degrees of freedom
## Multiple R-squared: 0.9594, Adjusted R-squared: 0.9188
## F-statistic: 23.62 on 1 and 1 DF, p-value: 0.1292
dfslope[dfslope$ID=="6_G322D",]$slope=- 1.3502
dfslope[dfslope$ID=="6_G322D",]$R2= 0.9594
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==7,])
summary(a) #slope=-1.2309, R^2=0.9919
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 7, ])
##
## Residuals:
## 19 20 21
## 0.1288 -0.2575 0.1288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.0207 0.4818 33.25 0.0191 *
## Sampling_time_min -1.2309 0.1115 -11.04 0.0575 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3154 on 1 degrees of freedom
## Multiple R-squared: 0.9919, Adjusted R-squared: 0.9837
## F-statistic: 121.8 on 1 and 1 DF, p-value: 0.05752
dfslope[dfslope$ID=="7_G322D",]$slope=- 1.2309
dfslope[dfslope$ID=="7_G322D",]$R2= 0.9919
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==1,])
summary(a) #slope-1.2174, R^2= 0.9414
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 1, ])
##
## Residuals:
## 22 23 24
## 0.3508 -0.7016 0.3508
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.1233 1.3125 13.046 0.0487 *
## Sampling_time_min -1.2174 0.3038 -4.007 0.1557
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8592 on 1 degrees of freedom
## Multiple R-squared: 0.9414, Adjusted R-squared: 0.8828
## F-statistic: 16.06 on 1 and 1 DF, p-value: 0.1557
dfslope[dfslope$ID=="1_S427*",]$slope=- 1.2174
dfslope[dfslope$ID=="1_S427*",]$R2= 0.9414
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==2,])
summary(a) #slope=-1.03485 , R^2=0.9986
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 2, ])
##
## Residuals:
## 25 26 27
## 0.04534 -0.09069 0.04534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.61483 0.16966 103.83 0.00613 **
## Sampling_time_min -1.03485 0.03927 -26.35 0.02415 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1111 on 1 degrees of freedom
## Multiple R-squared: 0.9986, Adjusted R-squared: 0.9971
## F-statistic: 694.5 on 1 and 1 DF, p-value: 0.02415
dfslope[dfslope$ID=="2_S427*",]$slope=- 1.03485
dfslope[dfslope$ID=="2_S427*",]$R2= 0.9986
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==3,])
summary(a) #slope=-1.3484, R^2=0.9893
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 3, ])
##
## Residuals:
## 28 29 30
## 0.1621 -0.3242 0.1621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.0550 0.6066 29.765 0.0214 *
## Sampling_time_min -1.3484 0.1404 -9.604 0.0660 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3971 on 1 degrees of freedom
## Multiple R-squared: 0.9893, Adjusted R-squared: 0.9786
## F-statistic: 92.24 on 1 and 1 DF, p-value: 0.06605
dfslope[dfslope$ID=="3_S427*",]$slope=- 1.3484
dfslope[dfslope$ID=="3_S427*",]$R2= 0.9893
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==4,])
summary(a) #slope=-0.9737, R^2= 0.9814
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 4, ])
##
## Residuals:
## 31 32 33
## 0.1548 -0.3095 0.1548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.0635 0.5791 26.011 0.0245 *
## Sampling_time_min -0.9737 0.1340 -7.265 0.0871 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3791 on 1 degrees of freedom
## Multiple R-squared: 0.9814, Adjusted R-squared: 0.9628
## F-statistic: 52.77 on 1 and 1 DF, p-value: 0.08709
dfslope[dfslope$ID=="4_S427*",]$slope=- 0.9737
dfslope[dfslope$ID=="4_S427*",]$R2= 0.9814
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==5,])
summary(a) #slope=-1.3133, R^2= 0.9513
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 5, ])
##
## Residuals:
## 34 35 36
## 0.3433 -0.6865 0.3433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.5109 1.2844 12.855 0.0494 *
## Sampling_time_min -1.3133 0.2973 -4.418 0.1417
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8408 on 1 degrees of freedom
## Multiple R-squared: 0.9513, Adjusted R-squared: 0.9025
## F-statistic: 19.52 on 1 and 1 DF, p-value: 0.1417
dfslope[dfslope$ID=="5_S427*",]$slope=- 1.3133
dfslope[dfslope$ID=="5_S427*",]$R2= 0.9513
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==7,])
summary(a) #slope=-1.2369, R^2=0.9416
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 7, ])
##
## Residuals:
## 37 38 39
## 0.3557 -0.7115 0.3557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.5624 1.3311 11.692 0.0543 .
## Sampling_time_min -1.2369 0.3081 -4.015 0.1554
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8714 on 1 degrees of freedom
## Multiple R-squared: 0.9416, Adjusted R-squared: 0.8832
## F-statistic: 16.12 on 1 and 1 DF, p-value: 0.1554
dfslope[dfslope$ID=="7_S427*",]$slope=- 1.2369
dfslope[dfslope$ID=="7_S427*",]$R2= 0.9416
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==5,])
summary(a) #slope=-0.28577, R^2= 0.9814
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "T101A" & df$Run == 5, ])
##
## Residuals:
## 40 41 42
## 0.04540 -0.09081 0.04540
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.66372 0.16989 86.314 0.00738 **
## Sampling_time_min -0.28577 0.03932 -7.267 0.08705 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1112 on 1 degrees of freedom
## Multiple R-squared: 0.9814, Adjusted R-squared: 0.9628
## F-statistic: 52.82 on 1 and 1 DF, p-value: 0.08705
dfslope[dfslope$ID=="5_T101A",]$slope=- 0.28577
dfslope[dfslope$ID=="5_T101A",]$R2= 0.9814
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==6,])
summary(a) #slope=-0.34008, R^2= 0.993
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "T101A" & df$Run == 6, ])
##
## Residuals:
## 43 44 45
## -0.03298 0.06595 -0.03298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.94736 0.12339 121.14 0.00526 **
## Sampling_time_min -0.34008 0.02856 -11.91 0.05334 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08078 on 1 degrees of freedom
## Multiple R-squared: 0.993, Adjusted R-squared: 0.986
## F-statistic: 141.8 on 1 and 1 DF, p-value: 0.05334
dfslope[dfslope$ID=="6_T101A",]$slope=- 0.34008
dfslope[dfslope$ID=="6_T101A",]$R2= 0.993
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==7,])
summary(a) #slope=-0.20034, R^2= 0.9875
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "T101A" & df$Run == 7, ])
##
## Residuals:
## 46 47 48
## -0.02607 0.05214 -0.02607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.68695 0.09755 150.562 0.00423 **
## Sampling_time_min -0.20034 0.02258 -8.873 0.07144 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06386 on 1 degrees of freedom
## Multiple R-squared: 0.9875, Adjusted R-squared: 0.9749
## F-statistic: 78.74 on 1 and 1 DF, p-value: 0.07144
dfslope[dfslope$ID=="7_T101A",]$slope=- 0.20034
dfslope[dfslope$ID=="7_T101A",]$R2= 0.9875
summary(dfslope)
## Run Genotype CFU_ml ID slope
## 1:3 G322D:7 Min. :121000000 Length:23 Min. :-1.3753
## 2:3 S427*:6 1st Qu.:196000000 Class :character 1st Qu.:-1.3190
## 3:3 T101A:3 Median :221000000 Mode :character Median :-1.0299
## 4:3 WT :7 Mean :235217391 Mean :-1.0036
## 5:4 3rd Qu.:267000000 3rd Qu.:-0.8821
## 6:3 Max. :391000000 Max. :-0.2003
## 7:4
## R2
## Min. :0.8880
## 1st Qu.:0.9469
## Median :0.9814
## Mean :0.9679
## 3rd Qu.:0.9925
## Max. :1.0000
##
# min: 0.89
# max: 1.00
Formula for calculating k: k (ml^-1 min^-1)= -slope/Bacteria concentration (CFU_ml)
# Calculate adsorption rate constant k
dfslope$k=-(dfslope$slope)/(dfslope$CFU_ml)
#write.csv(dfslope,"experiment_3_slope_analysis.csv")
## WT
mean(dfslope[dfslope$Genotype=="WT",]$k) #4.146119e-09
## [1] 4.146119e-09
std.error(dfslope[dfslope$Genotype=="WT",]$k) #5.982199e-10
## [1] 5.982199e-10
## T101A
mean(dfslope[dfslope$Genotype=="T101A",]$k) # 9.874041e-10
## [1] 9.874041e-10
std.error(dfslope[dfslope$Genotype=="T101A",]$k) #6.043767e-11
## [1] 6.043767e-11
## G322D
mean(dfslope[dfslope$Genotype=="G322D",]$k) # 6.102084e-09
## [1] 6.102084e-09
std.error(dfslope[dfslope$Genotype=="G322D",]$k) #9.289692e-10
## [1] 9.289692e-10
## S427*
mean(dfslope[dfslope$Genotype=="S427*",]$k) #5.931108e-09
## [1] 5.931108e-09
std.error(dfslope[dfslope$Genotype=="S427*",]$k) #5.968958e-10
## [1] 5.968958e-10
# Test for normal distribution
shapiro.test(dfslope$k) # passed: W = 0.9632, p-value = 0.5309
##
## Shapiro-Wilk normality test
##
## data: dfslope$k
## W = 0.9632, p-value = 0.5309
# Homogeneity of variances
leveneTest(k ~ Genotype, data = dfslope) # passed: group DF= 3, group sample= 19, F-value= 1.2369 p=0.324
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.2369 0.324
## 19
# Linear mixed effects model with Genotype as fixed effect and run as random effect
## Full model
m1<- lmer(k~Genotype +(1|Run) ,data= dfslope )
## Null model
m2<- lmer(k~1 +(1|Run),data=dfslope)
# Model comparison using anova to test if including genotype into the model significantly contributes to explaining adsorption constant variation
anova(m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dfslope
## Models:
## m2: k ~ 1 + (1 | Run)
## m1: k ~ Genotype + (1 | Run)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m2 3 -847.21 -843.80 426.60 -853.21
## m1 6 -870.97 -864.16 441.49 -882.97 29.764 3 1.548e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results of Anova Analysis of Variance Table Models: m2: k ~
1 + (1 | Run) m1: k ~ Genotype + (1 | Run) npar AIC BIC logLik deviance
Chisq Df Pr(>Chisq)
m2 3 -847.21 -843.80 426.60 -853.21
m1 6 -870.97 -864.16 441.49 -882.97 29.764 3 1.548e-06 ***
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: k ~ Genotype + (1 | Run)
## Data: dfslope
##
## REML criterion at convergence: -718.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.23170 -0.50637 0.04028 0.48679 1.95181
##
## Random effects:
## Groups Name Variance Std.Dev.
## Run (Intercept) 2.822e-18 1.680e-09
## Residual 6.623e-19 8.138e-10
## Number of obs: 23, groups: Run, 7
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.102e-09 7.055e-10 8.649
## GenotypeS427* -4.454e-10 4.586e-10 -0.971
## GenotypeT101A -4.292e-09 5.983e-10 -7.173
## GenotypeWT -1.956e-09 4.350e-10 -4.496
##
## Correlation of Fixed Effects:
## (Intr) GS427* GT101A
## GentypS427* -0.292
## GentypT101A -0.224 0.313
## GenotypeWT -0.308 0.474 0.364
Results of Model m1 summary Linear mixed model fit by REML [‘lmerMod’] Formula: k ~ Genotype + (1 | Run) Data: dfslope
REML criterion at convergence: -718.6
Scaled residuals: Min 1Q Median 3Q Max -1.23170 -0.50637 0.04028 0.48679 1.95181
Random effects: Groups Name Variance Std.Dev. Run (Intercept) 2.822e-18 1.680e-09 Residual 6.623e-19 8.138e-10 Number of obs: 23, groups: Run, 7
Fixed effects: Estimate Std. Error t value (Intercept) 6.102e-09 7.055e-10 8.649 GenotypeS427* -4.454e-10 4.586e-10 -0.971 GenotypeT101A -4.292e-09 5.983e-10 -7.173 GenotypeWT -1.956e-09 4.350e-10 -4.496
Correlation of Fixed Effects: (Intr) GS427* GT101A GentypS427*
-0.292
GentypT101A -0.224 0.313
GenotypeWT -0.308 0.474 0.364
# Using Tukey-HSD as Post-Hoc test for mutliple comparison
summary(glht(m1,linfct=mcp(Genotype="Tukey")),test=adjusted("holm"))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lmer(formula = k ~ Genotype + (1 | Run), data = dfslope)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## S427* - G322D == 0 -4.454e-10 4.586e-10 -0.971 0.331457
## T101A - G322D == 0 -4.292e-09 5.983e-10 -7.173 4.40e-12 ***
## WT - G322D == 0 -1.956e-09 4.350e-10 -4.496 2.76e-05 ***
## T101A - S427* == 0 -3.846e-09 6.296e-10 -6.109 5.02e-09 ***
## WT - S427* == 0 -1.511e-09 4.586e-10 -3.294 0.001977 **
## WT - T101A == 0 2.336e-09 5.983e-10 3.904 0.000284 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- holm method)
Results of Post-Hoc Multiple comparison testing Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: lmer(formula = k ~ Genotype + (1 | Run), data = dfslope)
Linear Hypotheses: Estimate Std. Error z value Pr(>|z|)
S427* - G322D == 0 -4.454e-10 4.586e-10 -0.971 0.331457
T101A - G322D == 0 -4.292e-09 5.983e-10 -7.173 4.40e-12 WT
- G322D == 0 -1.956e-09 4.350e-10 -4.496 2.76e-05 T101A -
S427* == 0 -3.846e-09 6.296e-10 -6.109 5.02e-09 WT -
S427 == 0 -1.511e-09 4.586e-10 -3.294 0.001977 WT - T101A
== 0 2.336e-09 5.983e-10 3.904 0.000284 ***
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Adjusted p values reported – holm method)
#output_file <- "adsorption_statistics_model_checking.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
## Plot layout
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(m1))
## 4 16
## 2 6
## residuals vs fitted values plot
plot(residuals(m1)~fitted(m1))
abline(a=0, b=0, col="red")
#dev.off()
(Figure Fig. 3A)
# Rearrange order of genotypes in ascending order
dfslope$Genotype <- factor(dfslope$Genotype , levels=c("T101A", "WT","S427*", "G322D"))
# visualize results of adsorption rate constant
a=ggplot(dfslope,aes(Genotype,k,fill=as.factor(Genotype)))+
geom_boxplot(alpha=0.8)+ #boxplot
geom_point()+ # individual adsorption constants as dots
annotate(geom="text", x=1.2, y=1.15e-09, label="a",
color="black", size=5)+ # significance letter F(T101A)
annotate(geom="text", x=2.2, y=5.25e-09, label="b",
color="black", size=5)+ # significance letter Ancestor
annotate(geom="text", x=3.2, y=6.8e-09, label="c",
color="black", size=5)+ # significance letter F(S427*)
annotate(geom="text", x=4.2, y=7.1e-09, label="c",
color="black", size=5)+ # significance letter F(G322D)
scale_y_continuous(trans="log10",
labels=trans_format("log10", math_format(10^.x)),
breaks=c(1e-09,1e-08))+ # log transformation of y-axis
scale_x_discrete(breaks=c("T101A","WT","S427*","G322D"),
labels=c("F(T101A)","Ancestor","F(S427*)","F(G322D)"))+ # change label of mutants on the x-axis
scale_fill_manual(values=c("#ae017e","#29485d","#ef7c00","#c6d325"))+ # color setting
labs(x = "Genotype",
y = expression(paste("Adsorption constant (ml"^"-1","min"^"-1 ",")")))+ #labels of x and y axis
theme_bw()+ # plot layout
theme(legend.position = "none")+ # no legend
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
a
# Save plot at 600 dpi resolution
#ggsave("adsorption.jpeg", plot = a, dpi = 600, width = 8, height = 6)
Code used to generate Fig. 3B and Fig. S4 in the manuscript
READ ME Specification of columns in data set
Block: Experimental block during which the phage isolate was evolved - 0: wild type non-evolved - A: 30-minute transfer experiment - 1: 3-hour transfer experiment
Phenotype: Lysis plaque phenotype forming large plaques (LP) or small plaques (SP)
Line: Selection line phage was evolved in - WT: wild type non-evolved - L3: 30-minute transfer experiment (corresponding to genotype F(T101A)) - L1: 3-hour transfer experiment (corresponding to genotypes F(G322D) [L1]) L4: 3-hour transfer experiment (corresponding to F(S427*) [L4])
Image: Replicate image of individual plaque assay plate, for each phage isolate, four images labelled A to D were taken (corresponding to four independent replicates)
Colony: Individual ID of a lysis plaque on a plaque assay plate selected from the segmentation. Plaques were manually selected, checking for correct recognition of the mask (i.e. intact area, non-connected, individual plaques)
Area: Lysis plaque area in cm^2
Distance: Distance to the plaque periphery in pixels (px)
Intensity: Plaque intensity in pixels (px) at individual distance position within the plaque
Intensity_SD: Standard deviation of pixel intensity
ID: Unique identifier combining block, phenotype, and line
df=read.csv("experiment_4_plaque_image_analysis.csv")
# df consists of 52687 observations and 10 variables
str(df) # structure of columns
## 'data.frame': 52687 obs. of 10 variables:
## $ Block : chr "0" "0" "0" "0" ...
## $ Phenotype : chr "LP" "LP" "LP" "LP" ...
## $ Line : chr "WT" "WT" "WT" "WT" ...
## $ Image : chr "A" "A" "A" "A" ...
## $ Colony : int 8 8 8 8 8 8 8 8 8 8 ...
## $ Area : num 0.555 0.555 0.555 0.555 0.555 ...
## $ Distance : num 1 1.41 2 2.24 2.83 ...
## $ Intensity : num 100 97.2 95.5 94.6 94 ...
## $ Intensity_SD: num 3.1 1.99 1.81 1.65 1.4 ...
## $ ID : chr "0_LP_WT" "0_LP_WT" "0_LP_WT" "0_LP_WT" ...
summary(df) #summary of columns
## Block Phenotype Line Image
## Length:52687 Length:52687 Length:52687 Length:52687
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Colony Area Distance Intensity
## Min. : 3.0 Min. :0.001384 Min. : 1.000 Min. : 60.00
## 1st Qu.: 19.0 1st Qu.:0.071833 1st Qu.: 5.831 1st Qu.: 76.75
## Median : 43.0 Median :0.294716 Median :10.296 Median : 83.00
## Mean :102.5 Mean :0.334197 Mean :12.869 Mean : 85.62
## 3rd Qu.:117.0 3rd Qu.:0.534672 3rd Qu.:19.105 3rd Qu.: 92.50
## Max. :621.0 Max. :1.524953 Max. :45.880 Max. :154.25
## Intensity_SD ID
## Min. : 0.0000 Length:52687
## 1st Qu.: 0.9428 Class :character
## Median : 1.7916 Mode :character
## Mean : 2.1207
## 3rd Qu.: 2.8986
## Max. :46.1976
Code used to generate Fig. 3B
Each colony id corresponds to one measured plaques. Multiple intensity and distance values exist for each colony id but only one area value. To visualize plaque areas, df has to be summarized according to area.
# Make a copy of the df to work with for the area analysis
df1=df
# Subset df1 used for area analysis by removing columns "Distance", "Intensity", and "Intensity_SD"
df1=df1[,-c(7:9)]
# Create unique identifier per colony & image
df1$ColID=paste(df1$ID,df1$Colony,df1$Image,sep="_")
# Summarize data to that only one area per colony id exists
df1=df1 %>%
group_by(ColID) %>%
summarise(across(c(Area),mean))
# Seperate unique identifier column "ColID" into individual columnse
df1=separate(df1,col=ColID,into=c("Block","Phenotype","Line","Colony", "Image"),sep="_")
# Convert Phenotype and Line into factors
df1$Phenotype=as.factor(df1$Phenotype)
df1$Line=as.factor(df1$Line)
# Create unique identifier "ID" combining Block, Phenotype, and Line
df1$ID=paste(df1$Block,df1$Phenotype,df1$Line,sep="_")
df1$ID=as.factor(df1$ID)
# Add a column with genotype information
df1$Genotype=NA
# Add genotypes
df1[df1$ID=="0_LP_WT",]$Genotype="WT"
df1[df1$ID=="1_SP_L1",]$Genotype="G322D"
df1[df1$ID=="1_SP_L4",]$Genotype="S427*"
df1[df1$ID=="A_LP_L3",]$Genotype="T101A"
# Convert Genotype into factor
df1$Genotype=as.factor(df1$Genotype)
# Rearrange Genotypes based on plaque size in decenting order
df1$Genotype <- factor(df1$Genotype , levels=c("T101A", "WT","S427*", "G322D"))
# Visualize plaque area using a combined box and violin plot (Fig. 3B)
a=ggplot(df1,aes(Genotype,Area, fill=Genotype))+
geom_violin(alpha=0.8)+ # violin plot
geom_boxplot(width=0.1, fill="white", outlier.shape=NA)+ # boxplot
scale_y_continuous(trans="log10",
labels=trans_format("log10", math_format(10^.x)),
breaks=c(1e-03,1e-02,1e-01,1e+00))+ # log scale with exponential y-ticks
scale_x_discrete(breaks=c("T101A","WT","S427*","G322D"),
labels=c("F(T101A)","Ancestor","F(S427*)","F(G322D)"))+ # Change x labels
scale_fill_manual(values=c("#ae017e","#29485d","#ef7c00","#c6d325"))+ #setting colors of violin plot
labs(x = "Genotype",
y = expression(paste("Plaque area (cm"^"2",")")))+ # labels of x and y axis
theme_bw()+ # plot layout
theme(legend.position = "none")+ # remove figure legend
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # set font size
a
# Save plot at high resolution resolution
#ggsave("plaque_area.jpeg", plot = a, dpi = 600, width = 8, height = 6)
# Summary of total number of plaques analyzed for each genotype
summary(df1)
## Block Phenotype Line Colony Image
## Length:870 LP:136 L1:303 Length:870 Length:870
## Class :character SP:734 L3: 60 Class :character Class :character
## Mode :character L4:431 Mode :character Mode :character
## WT: 76
##
##
## Area ID Genotype
## Min. :0.001384 0_LP_WT: 76 T101A: 60
## 1st Qu.:0.028572 1_SP_L1:303 WT : 76
## Median :0.051452 1_SP_L4:431 S427*:431
## Mean :0.112703 A_LP_L3: 60 G322D:303
## 3rd Qu.:0.095175
## Max. :1.524953
# F(G322D): 303
# F(S427*): 431
# F(T101A): 60
# ancestor: 76
# Calculate mean and standard error for plaque area of each genotype
## Ancestor
mean(df1[df1$Genotype=="WT",]$Area) # 0.375
## [1] 0.3749175
std.error(df1[df1$Genotype=="WT",]$Area) # 0.021
## [1] 0.02140839
## F(T101A)
mean(df1[df1$Genotype=="T101A",]$Area) #0.518
## [1] 0.5184235
std.error(df1[df1$Genotype=="T101A",]$Area) # 0.039
## [1] 0.0391119
## F(G322D)
mean(df1[df1$Genotype=="G322D",]$Area) # 0.033
## [1] 0.03278611
std.error(df1[df1$Genotype=="G322D",]$Area) # 0.001
## [1] 0.001302967
## F(S427*)
mean(df1[df1$Genotype=="S427*",]$Area) #0.066
## [1] 0.06616785
std.error(df1[df1$Genotype=="S427*",]$Area) #0.002
## [1] 0.00189727
Code for supplementary figure Fig. S4
# Make a copy of df
df2=df
# Create unique identifier per colony & image
df2$ColID=paste(df2$ID,df2$Colony,df2$Image,sep="_")
# Summarize data to that only one area per colony id exists
df2=df2 %>%
group_by(ColID) %>%
summarise(across(c(Area, Intensity),mean))
# Seperate unique identifier column "ColID" into individual columnse
df2=separate(df2,col=ColID,into=c("Block","Phenotype","Line","Colony", "Image"),sep="_")
# Convert Phenotype and Line into factors
df2$Phenotype=as.factor(df2$Phenotype)
df2$Line=as.factor(df2$Line)
# Create unique identifier "ID" combining Block, Phenotype, and Line
df2$ID=paste(df2$Block,df2$Phenotype,df2$Line,sep="_")
df2$ID=as.factor(df2$ID)
# Add a column with genotype information
df2$Genotype=NA
# Add genotypes
df2[df2$ID=="0_LP_WT",]$Genotype="Ancestor"
df2[df2$ID=="1_SP_L1",]$Genotype="F(G322D)"
df2[df2$ID=="1_SP_L4",]$Genotype="F(S427*)"
df2[df2$ID=="A_LP_L3",]$Genotype="F(T101A)"
# Convert Genotype into factor
df2$Genotype=as.factor(df2$Genotype)
# Set levels of genotypes form highest to lowest turbidty
df2$Genotype <- factor(df2$Genotype , levels=c("F(G322D)", "F(S427*)","F(T101A)", "Ancestor"))
# Plot results using a density plot
x=ggplot(df2,aes(x=Area,y=Intensity,color=Genotype)) +
geom_density_2d()+ # density plot with concentric areas
scale_x_continuous(trans="log10")+ # log transform area axis
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+ # set color palette
labs(x = expression(paste(" Plaque area (cm"^"2 ",")")),
y="Turbidity")+ # labels of x- and y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom")+ # legend position
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
# Save image at high resolution as jpeg file
ggsave("turbidity.jpeg", plot = x, dpi = 600, width = 8, height = 6)
Code used to generate Fig. 4D in the manuscript
READ ME Specification of columns in data set
-Treatment: Treatment Filtration: Samples were sterile filtered using a 0.2 µm filter to remove bacteria and phages still inside bacterial hosts
-Transfer: Transfer identity: Transfers T1, T2, T3, and T4 were individual 30-minute transfers. For transfer T1, phage lysate was used to inoculate bacterial hosts at an MOI input of ~0.1. For the three consecutive transfers, 100 µl of phage-bacteria mix was transferred onto fresh, susceptible ancestral bacteria cells. The same protocol was used in Experiment 2: 30-min transfer regime.
-Line: Genotype used to initiate the transfer - WT: ancestral wild-type phage genotype, large plaque former - T101A: mutant T101A forming large plaques, evolved during 30-minute transfers in Experiment 2: 30-min transfer regime.
-CFU: Colony-forming units of the ancestral E. coli C culture in exponential phase at the moment of infection initiation
-Dilution_CFU: Plating dilution used to plate CFUs; 100 µl of this dilution was plated for colony counts unless stated otherwise.
-CFU_ml: Colony-forming units in ml, calculated by CFUCFU_dilution10 (because 100 µl were plated)
-PFU_LP_1: Plaque-forming units of the large-plaque phenotype for each sample of replicate plating 1
-Dilution_1: Plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 1
-PFU_LP_2: Plaque-forming units of the large-plaque phenotype for each sample of replicate plating 2
-Dilution_2: Plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 2
-PFU_LP_3: Plaque-forming units of the large-plaque phenotype for each sample of replicate plating 3
-Dilution_3: Plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 3
-PFU_ml_LP1: Plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 1
-PFU_ml_LP2: Plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 2
-PFU_ml_LP3: Plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 3
-Median_PFU_ml: Median large plaque PFU per ml calculated based on phage concentrations of the three replicate platings
-Median_WT: Median of titering for the wild type taken from column “Median_PFU_ml” used to calculate the ratio between free phages of mutant F(T101A) and the ancestor
df=read.csv("experiment_5_30min_free_phages.csv")
# df has 24 observations and 18 variables
str(df) # structure of data columns
## 'data.frame': 24 obs. of 18 variables:
## $ Replicate : int 1 1 1 1 1 1 1 1 2 2 ...
## $ Treatment : chr "Filtration" "Filtration" "Filtration" "Filtration" ...
## $ Transfer : chr "T1" "T2" "T3" "T4" ...
## $ Line : chr "T101A" "T101A" "T101A" "T101A" ...
## $ CFU : int 148 136 78 135 148 136 78 135 75 73 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ CFU_ml : num 1.48e+08 1.36e+08 7.80e+07 1.35e+08 1.48e+08 1.36e+08 7.80e+07 1.35e+08 7.50e+07 7.30e+07 ...
## $ PFU_LP_1 : int 14 26 194 139 26 97 168 145 53 170 ...
## $ Dilution_1 : num 1e+06 1e+07 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+07 ...
## $ PFU_LP_2 : int 70 28 23 95 60 96 15 78 49 113 ...
## $ Dilution_2 : num 1e+05 1e+07 1e+07 1e+06 1e+05 1e+06 1e+07 1e+06 1e+06 1e+07 ...
## $ PFU_LP_3 : int 80 26 21 95 20 80 60 47 52 96 ...
## $ Dilution_3 : num 1e+05 1e+07 1e+07 1e+06 1e+05 1e+06 1e+06 1e+06 1e+06 1e+07 ...
## $ PFU_ml1 : num 1.40e+08 2.60e+09 1.94e+09 1.39e+09 2.60e+08 ...
## $ PFU_ml2 : num 7.0e+07 2.8e+09 2.3e+09 9.5e+08 6.0e+07 ...
## $ PFU_ml3 : num 8.0e+07 2.6e+09 2.1e+09 9.5e+08 2.0e+07 8.0e+08 6.0e+08 4.7e+08 5.2e+08 9.6e+09 ...
## $ Median_PFU_ml: num 8.0e+07 2.6e+09 2.1e+09 9.5e+08 6.0e+07 ...
## $ Median_WT : num 6.0e+07 9.6e+08 1.5e+09 7.8e+08 6.0e+07 9.6e+08 1.5e+09 7.8e+08 2.9e+08 7.3e+09 ...
summary(df) # summary of data columns
## Replicate Treatment Transfer Line
## Min. :1 Length:24 Length:24 Length:24
## 1st Qu.:1 Class :character Class :character Class :character
## Median :2 Mode :character Mode :character Mode :character
## Mean :2
## 3rd Qu.:3
## Max. :3
## CFU Dilution_CFU CFU_ml PFU_LP_1
## Min. : 73.0 Min. :1e+05 Min. : 73000000 Min. : 14.00
## 1st Qu.: 96.0 1st Qu.:1e+05 1st Qu.: 96000000 1st Qu.: 25.25
## Median :135.5 Median :1e+05 Median :135500000 Median : 43.50
## Mean :150.6 Mean :1e+05 Mean :150583333 Mean : 72.12
## 3rd Qu.:150.8 3rd Qu.:1e+05 3rd Qu.:150750000 3rd Qu.:109.00
## Max. :354.0 Max. :1e+05 Max. :354000000 Max. :194.00
## Dilution_1 PFU_LP_2 Dilution_2 PFU_LP_3
## Min. : 1000000 Min. : 15.00 Min. :1.00e+05 Min. : 15.00
## 1st Qu.: 1000000 1st Qu.: 22.75 1st Qu.:1.00e+06 1st Qu.: 25.25
## Median : 10000000 Median : 45.50 Median :1.00e+07 Median : 45.00
## Mean : 17125000 Mean : 50.96 Mean :1.78e+07 Mean : 52.54
## 3rd Qu.: 10000000 3rd Qu.: 75.00 3rd Qu.:1.00e+07 3rd Qu.: 80.00
## Max. :100000000 Max. :113.00 Max. :1.00e+08 Max. :128.00
## Dilution_3 PFU_ml1 PFU_ml2
## Min. : 100000 Min. :1.400e+08 Min. :6.000e+07
## 1st Qu.: 1000000 1st Qu.:8.600e+08 1st Qu.:7.075e+08
## Median : 10000000 Median :2.270e+09 Median :2.550e+09
## Mean : 17425000 Mean :5.562e+09 Mean :5.272e+09
## 3rd Qu.: 10000000 3rd Qu.:8.100e+09 3rd Qu.:7.625e+09
## Max. :100000000 Max. :1.900e+10 Max. :2.200e+10
## PFU_ml3 Median_PFU_ml Median_WT
## Min. :2.000e+07 Min. :6.000e+07 Min. :6.000e+07
## 1st Qu.:5.075e+08 1st Qu.:7.150e+08 1st Qu.:6.575e+08
## Median :2.650e+09 Median :2.350e+09 Median :1.800e+09
## Mean :5.344e+09 Mean :5.366e+09 Mean :3.560e+09
## 3rd Qu.:9.375e+09 3rd Qu.:8.700e+09 3rd Qu.:5.425e+09
## Max. :2.300e+10 Max. :2.200e+10 Max. :1.280e+10
# Turn line into a factor
df$Line=as.factor(df$Line)
df$ratio=df$Median_PFU_ml/df$Median_WT
# for WT, ratio should be 1, check if this is true
summary(df[df$Line=="WT",]$ratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
# Remove all WT (ratio value=1)
df1=df[df$Line!="WT",]
# Create a column for ratio mean and se
df1$mean=NA
df1$st.error=NA
# Mean and standard error of all transfer T1 for three replicates
mean(df1[df1$Transfer=="T1",]$ratio) #1.51
## [1] 1.505914
std.error(df1[df1$Transfer=="T1",]$ratio) #0.14
## [1] 0.1445667
## add values to table
df1[df1$Transfer=="T1",]$mean=1.51
df1[df1$Transfer=="T1",]$st.error=0.14
# Mean and standard error of all transfer T2 for three replicates
mean(df1[df1$Transfer=="T2",]$ratio) #1.99
## [1] 1.991676
std.error(df1[df1$Transfer=="T2",]$ratio) #0.36
## [1] 0.3617051
## add values to table
df1[df1$Transfer=="T2",]$mean=1.99
df1[df1$Transfer=="T2",]$st.error=0.36
# Mean and standard error of all transfer T3 for three replicates
mean(df1[df1$Transfer=="T3",]$ratio) #1.84
## [1] 1.836898
std.error(df1[df1$Transfer=="T3",]$ratio) #0.22
## [1] 0.2184878
## add values to table
df1[df1$Transfer=="T3",]$mean=1.84
df1[df1$Transfer=="T3",]$st.error=0.22
# Mean and standard error of all transfer T4 for three replicates
mean(df1[df1$Transfer=="T4",]$ratio) #2.47
## [1] 2.477411
std.error(df1[df1$Transfer=="T4",]$ratio) #0.88
## [1] 0.8779285
## add values to table
df1[df1$Transfer=="T4",]$mean=2.47
df1[df1$Transfer=="T4",]$st.error=0.88
# Add transfer as individual number
df1$no_transfer=NA
df1[df1$Transfer=='T1',]$no_transfer=1
df1[df1$Transfer=='T2',]$no_transfer=2
df1[df1$Transfer=='T3',]$no_transfer=3
df1[df1$Transfer=='T4',]$no_transfer=4
# Visualize phage ratios compared to wild type in a bar plot (Fig. 4D)
a=ggplot(df1,aes(no_transfer, mean,fill=Line))+
geom_bar(stat="identity", position=position_dodge())+ # bar chart vertical
geom_errorbar(aes(ymin=mean-st.error, ymax=mean+st.error), width=.2,
position=position_dodge(.9))+ # add error bars
scale_fill_manual(values=c("#ae017e"))+ # set colors of bars
geom_hline(yintercept=1,linetype="dashed", color="black")+ # horizontal line at 1 (indicating ratio frequency of wild type )
xlab("Transfer")+ # label x-axis
ylab("Relative free phage particles after 30 mins")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "none")+
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # remove legend and set font size
a
# Save plot at high resolution (Fig 4 D)
#ggsave("free_phages.jpeg", plot = a, dpi = 600, width = 8, height = 6)
Code used to generate Fig. 5B and Fig. S6 in the manuscript
READ ME Specification of columns in data set
Sampling_time_min: Sampling time in minutes, phages were extracted by chloroform extraction
Genotype: Genotypes of phage isolates - WT: wild-type large plaque former - T101A: large plaque mutant evolved from WT using 30-min transfers - G322D: small plaque mutant evolved from WT using 3 hour transfers - S427*: small plaque mutant evolved from WT using 3 hour transfers
CFU: colony-forming units of E. coli C exponential phase culture at the moment of phage infection
Dilution_CFU: Plating dilution used to plate CFUs; 100 µl of this dilution was plated for colony counts unless stated otherwise.
CFU_ml: Colony-forming units per ml calculated by CFUCFU_dilution10; factor *10 because 100 µl of CFU_dilution was plated
PFU: Plaque-forming units
Dilution_PFU: Plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting
PFU_ml: Plaque-forming units per ml calculated by PFUPFU_dilution10; factor *10 because 100 µl of PFU_dilution was plated
df=read.csv("experiment_6_decay_analysis.csv")
# df contains 40 observations of 8 variables
str(df) # structure of columns
## 'data.frame': 40 obs. of 8 variables:
## $ Sampling_time_min: int 30 30 30 30 60 60 60 60 90 90 ...
## $ Genotype : chr "WT" "G322D" "S427*" "T101A" ...
## $ CFU : int 108 108 108 108 108 108 108 108 108 108 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ CFU_ml : num 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 ...
## $ PFU : int 62 72 245 101 81 128 138 48 40 147 ...
## $ Dilution_PFU : num 1e+07 1e+07 1e+07 1e+06 1e+08 1e+08 1e+08 1e+08 1e+08 1e+08 ...
## $ PFU_ml : num 6.20e+09 7.20e+09 2.45e+10 1.01e+09 8.10e+10 ...
summary(df) #summary of columns
## Sampling_time_min Genotype CFU Dilution_CFU
## Min. : 30 Length:40 Min. :108 Min. :1e+05
## 1st Qu.: 90 Class :character 1st Qu.:108 1st Qu.:1e+05
## Median :165 Mode :character Median :108 Median :1e+05
## Mean :165 Mean :108 Mean :1e+05
## 3rd Qu.:240 3rd Qu.:108 3rd Qu.:1e+05
## Max. :300 Max. :108 Max. :1e+05
## CFU_ml PFU Dilution_PFU PFU_ml
## Min. :1.08e+08 Min. : 15.00 Min. : 1000000 Min. :1.010e+09
## 1st Qu.:1.08e+08 1st Qu.: 48.00 1st Qu.: 10000000 1st Qu.:5.975e+09
## Median :1.08e+08 Median : 83.50 Median :100000000 Median :4.400e+10
## Mean :1.08e+08 Mean : 97.25 Mean : 65350000 Mean :6.710e+10
## 3rd Qu.:1.08e+08 3rd Qu.:139.25 3rd Qu.:100000000 3rd Qu.:1.190e+11
## Max. :1.08e+08 Max. :245.00 Max. :100000000 Max. :2.230e+11
# Convert genotype into factor
df$Genotype=as.factor(df$Genotype)
# Add plaque phenotype column to the df frame
df$Phenotype=NA
df[df$Genotype=="WT",]$Phenotype="LP"
df[df$Genotype=="T101A",]$Phenotype="LP"
df[df$Genotype=="G322D",]$Phenotype="SP"
df[df$Genotype=="S427*",]$Phenotype="SP"
# Convert phenotype into a factor
df$Phenotype=as.factor(df$Phenotype)
# Add Genotypes as Labels including the protein in which mutation occurs
df$Label=NA
df[df$Genotype=="G322D",]$Label="F(G322D)"
df[df$Genotype=="S427*",]$Label="F(S427*)"
df[df$Genotype=="T101A",]$Label="F(T101A)"
df[df$Genotype=="WT",]$Label="Ancestor"
# Convert Label into factor
df$Label=as.factor(df$Label)
(Supplementary Figure S6)
# Rearrange Label to the corresponding color code
df$Label <- factor(df$Label , levels=c("F(G322D)", "F(S427*)","F(T101A)", "Ancestor"))
# Plotting population dynamics throughout infection period
x=ggplot(df,aes(Sampling_time_min,PFU_ml,group=Label, color=Label))+
geom_point(size=3)+ # dotplot
geom_smooth(method="loess", se=F, linetype="dashed")+ #model fit loess
scale_y_continuous(trans="log10",
labels=trans_format("log10", math_format(10^.x)),
breaks=c(1e+09,1e+10,1e+11) )+ #y-axis log-transformation
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # setting colors
xlab("Infection period (min)")+ # label x-axis
ylab("Infectious phage particles per ml")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom")+ # legend position
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
## `geom_smooth()` using formula = 'y ~ x'
# Save plot as high resolution jpeg file
# ggsave("supplementary_pop_dyn.jpeg", plot = x, dpi = 600, width = 8, height = 6)
(Figure Fig 5B)
# Determine peak titer for each genotype and the time point of the peak
df$Peak=NA
df[df$Genotype=="WT",]$Peak=8.10e+10 # peak at 60 min p.i.
df[df$Genotype=="T101A",]$Peak=4.80e+10 #peak at 60 min p.i
df[df$Genotype=="G322D",]$Peak=1.83e+11 # peak at 120 min p.i.
df[df$Genotype=="S427*",]$Peak=2.23e+11 # peak at 120 min p.i.
# Calculate ratio between each time step and peak
df$Ratio=df$PFU_ml/df$Peak
# Create unique identifier combining sampling time and genotype
df$ID=paste(df$Sampling_time_min,df$Genotype,sep="_")
# Plot growth and decay ratios using a dot plot
ggplot(df,aes(Sampling_time_min,Ratio,group=Genotype,color=Genotype))+
geom_point(size=3)+ # dot plot
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # set color palette
xlab("Infection period (min)")+ # label x-axis
ylab("Ratio compared to titer peak")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom") # legend position
# Remove time points between start and peak phage titers to only capture decay period
data1=df[df$Sampling_time_min!=30,]
data1=data1[data1$ID!="60_G322D",]
data1=data1[data1$ID!="60_S427*",]
data1=data1[data1$ID!="90_G322D",]
data1=data1[data1$ID!="90_S427*",]
# Plot ratios during the decay period
ggplot(data1[data1$Sampling_time_min<310,],aes(Sampling_time_min,Ratio,group=Genotype,color=Genotype))+
geom_point(size=2.5)+ # dot plot
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # set color palette
xlab("Infection period (min)")+ # label x-axis
ylab("Remaining fraction of infectious phage particles")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom") # legend position
# ==> Phage decay seems to follow an exponential decay function
# Plot WT decay using a dotplot
ggplot(data1[data1$Genotype=="WT",], aes(Sampling_time_min,Ratio))+
geom_point()
# x and y vector extracted from the WT dataset for model fitting
x1=data1[data1$Genotype=="WT",]$Sampling_time_min
y1=data1[data1$Genotype=="WT",]$Ratio
# Compare Linear model with exponential decay model
## Linear model
a1=lm(y1~x1)
## Exponential model
### Select appropriate starting values: Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y1) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y1 - theta.0) ~ x1)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start<-list(alpha = alpha.0, beta = beta.0)
a2<- nls(y1 ~ alpha * exp(beta * x1) , start = start)
## Summary list AIC and BIC for model comparison
AIC(a1,a2)
## df AIC
## a1 3 0.3392432
## a2 3 -18.7277255
# df AIC
#a1 3 0.3392432
#a2 3 -18.7277255
BIC(a1,a2)
## df BIC
## a1 3 0.9309169
## a2 3 -18.1360518
# df BIC
#a1 3 0.9309169
#a2 3 -18.1360518
# ==> model a2 is best fit
## Plot predicted and measured values
plot(x1,y1)
lines(x1, predict(a2, list(x = x1)), col = 'skyblue', lwd = 3)
## Get parameter estimates
summary(a2)
##
## Formula: y1 ~ alpha * exp(beta * x1)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 3.483344 0.708369 4.917 0.001719 **
## beta -0.021112 0.002708 -7.797 0.000107 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06946 on 7 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 2.277e-06
# alpha; 3.5, beta -0.021
# Model checking
## creat an environment in which the model checking plots are saved at high resolution
#output_file <- "decay_WT_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
# setting plotting environment
par(mfrow=c(1,2))
# qqplot
qqPlot(residuals(a2))
## [1] 4 3
### Normality of residuals
plot(residuals(a2)~fitted(a2))
abline(a=0, b=0, col="red")
#dev.off()
# Visualize decay period using a dotplot
ggplot(data1[data1$Genotype=="T101A",], aes(Sampling_time_min,Ratio))+
geom_point()
# x and y vector extracted from the WT dataset for model fitting
x2=data1[data1$Genotype=="T101A",]$Sampling_time_min
y2=data1[data1$Genotype=="T101A",]$Ratio
# Model selection
## Linear model
b1=lm(y2~x2)
## Exponential decay model
###Select appropriate starting values; Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y2) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y2 - theta.0) ~ x2)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start=list(alpha = alpha.0, beta = beta.0)
b2 <- nls(y2 ~ alpha * exp(beta * x2), start = start)
### Select model based on lower AIC and BIC value
## Combine all AICs and BICs
AIC(b1,b2)
## df AIC
## b1 3 0.08641326
## b2 3 -12.04236649
# df AIC
# b1 3 0.08641326
# b2 3 -12.04236649
BIC(b1,b2)
## df BIC
## b1 3 0.678087
## b2 3 -11.450693
# df BIC
# b1 3 0.678087
# b2 3 -11.450693
# ==> Select model b2 due to lower AIC and BIC
# Plot fitted curve and data
plot(x2,y2)
lines(x2, predict(b2, list(x = x2)), col = 'skyblue', lwd = 3)
summary(b2)
##
## Formula: y2 ~ alpha * exp(beta * x2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 3.078747 0.904802 3.403 0.01140 *
## beta -0.019946 0.003831 -5.206 0.00124 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1007 on 7 degrees of freedom
##
## Number of iterations to convergence: 9
## Achieved convergence tolerance: 7.025e-06
# alpha: 3.08, beta= -0.0199
# Model checking for b3
## Create an environment in which the model validation plots are safed at high resolution
#output_file <- "decay_T101A_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
## Set up plot window
par(mfrow=c(1,2))
## QQPlot
qqPlot(residuals(b2))
## [1] 2 6
## Residuals vs fitted values
plot(residuals(b2)~fitted(b2))
abline(a=0, b=0, col="red")
#dev.off()
# Visualize decay using a dotplot
ggplot(data1[data1$Genotype=="G322D",], aes(Sampling_time_min,Ratio))+
geom_point()
# Remove outlier t210
x3=data1[data1$Genotype=="G322D" & data1$Sampling_time_min!=210,]$Sampling_time_min
y3=data1[data1$Genotype=="G322D" & data1$Sampling_time_min!=210,]$Ratio
# Model comparison between linear and exponential decay model
## Linear model
c1=lm(y3~x3)
## exponential model
### Select appropriate starting values
### Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y3) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y3 - theta.0) ~ x3)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start <- list(alpha = alpha.0, beta = beta.0)
c2 <- nls(y3 ~ alpha * exp(beta * x3) , start = start)
### Summary all AICs and BICs
AIC(c1,c2)
## df AIC
## c1 3 -7.792185
## c2 3 -11.601528
# df AIC
# c1 3 -7.792185
# c2 4 -11.675159
BIC(c1,c2)
## df BIC
## c1 3 -8.416907
## c2 3 -12.226250
# df BIC
#c1 3 -8.416907
#c2 4 -12.2262
# ==> Model c2 selected based on lower AIC and BIC
## Plot predicted vs measured values
plot(x3,y3)
lines(x3, predict(c2, list(x = x3)), col = 'skyblue', lwd = 3)
# Extract parameter estimates
summary(c2)
##
## Formula: y3 ~ alpha * exp(beta * x3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 2.091772 0.308667 6.777 0.00247 **
## beta -0.006480 0.000862 -7.518 0.00168 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06836 on 4 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 3.111e-06
## alpha: 2.09, beta: -0.0065
# Model validation for c2
## Creat an evironment in which the validation plot can be saved at high resolution
#output_file <- "decay_G322D_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
# Plot setup
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(c2))
## [1] 5 3
## residuals vs fitted values plot
plot(residuals(c2)~fitted(c2))
abline(a=0, b=0, col="red")
#dev.off()
# Visualize decay using a dotplot
ggplot(data1[data1$Genotype=="S427*",], aes(Sampling_time_min,Ratio))+
geom_point()
# remove outlier t210
x4=data1[data1$Genotype=="S427*"&data1$Sampling_time_min!="210",]$Sampling_time_min
y4=data1[data1$Genotype=="S427*"& data1$Sampling_time_min!="210",]$Ratio
plot(x4,y4)
# Model selection between linear and exponential decay model
## Linear model
d1=lm(y4~x4)
## Exponential decay model: Select appropriate starting values; Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y4) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y4 - theta.0) ~ x4)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start <- list(alpha = alpha.0, beta = beta.0)
d2 <- nls(y4 ~ alpha * exp(beta * x4) , start = start)
### Model comparison to chose the best fit model using AIC and BIC
AIC(d1,d2)
## df AIC
## d1 3 -3.548757
## d2 3 -5.901902
# df AIC
#d1 3 -3.548757
#d2 3 -5.901902
BIC(d1,d2)
## df BIC
## d1 3 -4.173478
## d2 3 -6.526623
# df BIC
# d1 3 -4.173478
# d2 3 -6.526623
# ==> Model d2 selected based on the lower AIC and BIC
## Extract parameter estimates
summary(d2)
##
## Formula: y4 ~ alpha * exp(beta * x4)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 1.745038 0.407150 4.286 0.0128 *
## beta -0.005442 0.001313 -4.146 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1099 on 4 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 2.651e-06
## alpha: 1.75, beta: -0.0054
# Model validation for d2
## Create an evironment to save validation plots at high resolution
#output_file <- "decay_S427_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
## Plot layout
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(d2))
## [1] 3 6
## residuals vs fitted values plot
plot(residuals(d2)~fitted(d2))
abline(a=0, b=0, col="red")
#dev.off()
(Figure Fig. 5B)
# Visualize decay data using dot and line plots
a=ggplot()+
geom_point(aes(x=x1,y=y1),color="#252525")+ # measured decay ancestor
geom_point(aes(x=x2,y=y2),color="#ae017e")+ # measured decay F(T101A)
geom_point(aes(x=x3,y=y3),color="#c6d325")+ # measured decay F(G322D)
geom_point(aes(x=x4,y=y4),color="#ef7c00")+ # measured decay F(S427*)
annotate(geom="text", x=250, y=1.00, hjust=0, label=expression("Decay rate (min"^{-1}*"):"),
color="black", fontface="bold", size=5)+ # Decay rate text
annotate(geom="text", x=250, y=0.92, hjust=0,label=expression("F(G322D): 6.5*10"^{-3}*""),
color="#c6d325", fontface="bold", size=5)+ # Decay rate text F(G322D)
annotate(geom="text", x=250, y=0.85, hjust=0,label=expression("F(S427*): 5.4*10"^{-3}*""),
color="#ef7c00", fontface="bold", size=5)+# Decay rate text F(S427*)
annotate(geom="text", x=250, y=0.78, hjust=0,label=expression("F(T101A): 2.0*10"^{-2}*""),
color="#ae017e", fontface="bold", size=5)+# Decay rate text F(T101A)
annotate(geom="text", x=250, y=0.71, hjust=0,label=expression("Ancestor: 2.1*10"^{-2}*""),
color="#252525", fontface="bold", size=5)+ # Decay rate text ancestor
geom_line(aes(x=x1, y=predict(a2)),color="#252525", size=0.75)+ # predicted decay ancestor
geom_line(aes(x=x2, y=predict(b2)),color="#ae017e", size=0.75)+ # predicted decay F(T101A)
geom_line(aes(x=x3, y=predict(c2)),color="#c6d325", linetype="dashed",size=0.75)+ # predicted decay F(G322D)
geom_line(aes(x=x4, y=predict(d2)),color="#ef7c00", linetype="dashed",size=0.75)+ # predicted decay F(S427*)
xlab("Elapsed time since infection (min)")+ # label x-axis
ylab("Remaining fraction of infectious phage particles")+ # label y-axis
theme_bw()+ # plot layout
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
a
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save plot at high resolution
# ggsave("decay_plot.jpeg", plot = a, dpi = 600, width = 9, height = 7)
Code used to generate Fig. S3 in the manuscript
READ ME Specification of columns in data set
Genotype: Phage genotype for which OSGC was done: wild type (WT)
MOI: Multiplicity of infectiousness input when adsorption is initiated
Sampling_time_min: Sampling time point in minutes after the 6-minute synchronised adsorption period, followed by a 2000x dilution step
PFU_1: Plaque-forming unit counts (PFU) for the first plating of triplicate plating for phage lysate for each sampling time point
PFU_2: Plaque-forming unit counts (PFU) for the second plating of triplicate plating for phage lysate for each sampling time point
PFU_3: Plaque-forming unit counts (PFU) for the third plating of triplicate plating for phage lysate for each sampling time point
Median_PFU: Median PFU counts from triplicate plating PFU_1, PFU_2, PFU_3. If triplicate plating was not possible, the mean of two platings or just one plating value was taken for phage titer calculation. The reason for this was not enough sampling volume to do triplicate plating (i.e. at early time points).
Dilution_PFU: Plating dilution used to plate phage lysate isolated after the one-step growth curve using chloroform in a plaque assay
Dilution_OSGC: Dilution step between adsorption and OSGC sampling to prevent secondary infection. The dilution step for all three one-step runs was a 2000-fold dilution using 10 µl of bacteria-phage mix after adsorption and diluting it into 20 ml supplemented, prewarmed (37 °C) LB medium.
PFU_ml: Phage concentration in plaque-forming units per ml (PFU/ml), calculated from Median_PFU *Dilution_PFU
t0_PFU_ml: Phage concentration in PFU/ml at time point t0 after adsorption. These are free phages which need to be subtracted from each following time point phage titer to normalise the data for free/unadsorbed phages.
PFU_t0_norm: Normalised PFU concentration for each time point calculated by PFU_ml - t0_PFU_ml
Bacteria_CFU_ml: Bacteria concentration in colony-forming units per ml CFU/ml) at the start of the adsorption period. Calculated by counting colony-forming units (CFU)* Plating dilution (10^6)
Infected_CFU_ml: Number of Bacteria which could be in theory infected by phages, calculated by Bacteria_CFU_ml*MOI/Dilution_OSGC
PFU_ml_produced: Mean phage burst size at each sampling time point
Elapsed_time: Elapsed time since adsorption. Calculated by adding 6 min adsorption to each “Sampling_time_ min” OSGC sampling time point
df=read.csv("experiment_7_OSGC_WT.csv")
# df has 11 observations and 16 variables
str(df) # structure of columns
## 'data.frame': 11 obs. of 16 variables:
## $ Genotype : chr "WT" "WT" "WT" "WT" ...
## $ MOI : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ Sampling_time_min: int 1 2 3 4 5 6 8 10 15 20 ...
## $ PFU_1 : num 17 26 6 12 134 ...
## $ PFU_2 : int 11 18 NA 11 148 119 23 33 15 16 ...
## $ PFU_3 : int NA NA NA NA 136 129 14 65 18 21 ...
## $ Median_PFU : num 14 22 6 11.5 136 129 23 65 18 21 ...
## $ Dilution_PFU : num 2.5 2.5 3.3 5.0 1.0e+01 1.0e+02 1.0e+04 1.0e+04 1.0e+05 1.0e+05 ...
## $ Dilution_OSGC : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ PFU_ml : num 3.50e+01 5.50e+01 1.98e+01 5.75e+01 1.36e+03 1.29e+04 2.30e+05 6.50e+05 1.80e+06 2.10e+06 ...
## $ t0_PFU_ml : num 35 35 35 35 35 35 35 35 35 35 ...
## $ PFU_t0_norm : num 0 20 -15.2 22.5 1330 12900 230000 650000 1800000 2100000 ...
## $ Bacteria_CFU_ml : num 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 ...
## $ Infected_CFU_ml : num 14100 14100 14100 14100 14100 14100 14100 14100 14100 14100 ...
## $ PFU_ml_produced : num 0 0 0 0 0.09 ...
## $ Elapsed_time : int 7 8 9 10 11 12 14 16 21 26 ...
ggplot(df, aes(Elapsed_time,PFU_ml_produced))+
geom_point(size=2)+ # dot plot
scale_y_continuous(trans = scales::pseudo_log_trans(),
breaks = c(0, 10^(0:6)))+ # log transformed y axis
xlim(0,35)+ # set x limits so time point 0-6 min (adsorption time) are also visualized
theme_bw() # plot layout
# Subset data frame to columns "Elapsed time" and "PFU_ml_produced"
WT=df[,c(16,15)]
## For model fitting, first compare exponential growth with logistic growth model to determine "best-fit" model
### Exponential increase model (beta= constant, alpha=growth rate)
model1<-nls(PFU_ml_produced~(beta+exp(alpha*Elapsed_time)),
data=WT, start=list(beta=max(WT$PFU_ml_produced),alpha=0.5))
### Logistic growth model
model2 <- nls(PFU_ml_produced ~ SSlogis(Elapsed_time, b, l, scal), data=WT,
start = list(b=max(WT$PFU_ml_produced),l=mean(WT$Elapsed_time),scal=1))
## Model comparison based on AIC and BIC
AIC(model1,model2)
## df AIC
## model1 3 117.03264
## model2 4 73.82167
# df AIC
#model1 3 117.03
#model2 4 73.82
BIC(model1,model2)
## df BIC
## model1 3 118.22633
## model2 4 75.41325
#df BIC
#model1 3 118.23
#model2 4 75.41
### --> Select model 2 [logistic growth] due to the lower AIC and BIC value ###
## Extract parameters
summary(model2)
##
## Formula: PFU_ml_produced ~ SSlogis(Elapsed_time, b, l, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## b 138.0512 3.8516 35.842 4.02e-10 ***
## l 17.0011 0.3487 48.757 3.46e-11 ***
## scal 1.4321 0.2583 5.545 0.000544 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.653 on 8 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 8.576e-06
#Parameters:
#b 138.0512 3.8516 35.842 4.02e-10 ***
#l 17.0011 0.3487 48.757 3.46e-11 ***
## Results##
# burst size b = 138, lysis time l= 17 minutes
## Model validation using a qqplot and residuals vs fitted plot (Supplementary S3 B and C)
# Set the output file name to export plots at high resolution
output_file <- "model_check_OSGC.jpeg"
# Open a PNG device
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
#par(mfrow=c(1,2))
qqPlot(residuals(model2)) # looks good
## [1] 10 11
plot(residuals(model2)~fitted(model2))
abline(a=0, b=0, col="red") # ok
#dev.off()
## Visualize model fit for one-step growth curve (Supplementary S3 A)
x=ggplot()+
geom_point(data=WT,aes(x=Elapsed_time,y=PFU_ml_produced), color="#252525")+ #raw data of produced phages per time step
geom_line(data=WT,aes(x=Elapsed_time,y=predict(model2)),color="#252525", linetype="dashed",size=0.8)+ # model fit of model 2
annotate(geom="text", x=6, y=140, hjust=0,label="lysis time: 17 min",
color="black")+ # add lysis time value as text
annotate(geom="text", x=6, y=130, hjust=0,label="burst size: 138 per host cell",
color="black")+ # add burst size value as text
xlab("Elased time since adsorption initiation (min)")+ # label x-axis
ylab("Produced infectious phage particles per ml")+ # label y-axis
theme_bw()+ # plot layout
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
# Export one step plot at high resolution
# ggsave("OSGC_WT.jpeg", plot = x, device = "jpeg", dpi = 600, width = 8, height = 6)